Chapter 3 Clip by Ecoregion

3.1 Location to save output

## default location:
save_default <- './clip_FIA/' 
## Alternatively, enter specific custom location:
save_custom <- 'C:/Users/JJGross/Documents/R_projects/FIA_data/clip_FIA/'

## Specify `save_location <- save_custom` or `save_location <- save_default`
save_location <- save_custom

## Create directory if it doesn't already exist
if (!dir.exists(save_location)) {dir.create(save_location)}

3.2 Load ‘Ecoregions’ spatial data

Load in a file containing hierarchical spatial categories, or levels, of polygons surrounding the Appalachian National Scenic Trail (more info on how this file was created can be found in APPA Forest Health FIA Narrative). The hierarchical spatial levels (from smallest to largest) include ecological subsections, ecological sections, and provinces (all collectively referred to as ecoregions here). The APPA Ecoregions dataset can be downloaded from IRMA: APPA_Ecoregions_USFS_HUC10_Shell_AEA

load_eco_path <- "C:/Users/JJGross/Downloads/APPA_Ecoregions_USFS_HUC10_Shell_AEA"

Read the shapefile using read_sf() from the sf (simple features) package.

eco_shapefile <- sf::read_sf(dsn = load_eco_path, 
                   layer = 'APPA_Ecoregions_USFS_HUC10_Shell_AEA')

3.2.1 Explore ecoregion levels:

plot(eco_shapefile["SUBSECTION"])

plot(eco_shapefile["SECTION_NA"])

## SUBSECTIONS:
n_distinct(eco_shapefile$SUBSECTION)
## [1] 50
levels(as.factor(eco_shapefile$SUBSECTION))
##  [1] "211Aa"  "211Ba"  "211Bb"  "211Da"  "211Ec"  "211Fc"  "211Fd"  "211Ia" 
##  [9] "221Ae"  "221Al"  "221Am"  "221Ba"  "221Bb"  "221Bd"  "221Da"  "221Db" 
## [17] "221Dc"  "221Dd"  "221De"  "221Ja"  "221Jb"  "221Jc"  "231Ab"  "231Ac" 
## [25] "231Ad"  "231Ag"  "231Ib"  "M211Ab" "M211Ac" "M211Ad" "M211Ae" "M211Af"
## [33] "M211Ag" "M211Ba" "M211Bb" "M211Bc" "M211Ca" "M211Cb" "M211Cc" "M211Cd"
## [41] "M221Aa" "M221Ab" "M221Ac" "M221Ad" "M221Be" "M221Cb" "M221Da" "M221Db"
## [49] "M221Dc" "M221Dd"
## SUBSECTI_1:
levels(as.factor(eco_shapefile$SUBSECTI_1)) 
##  [1] "Aroostook Hills"                         
##  [2] "Berkshire-Vermont Upland"                
##  [3] "Catskill Mountains"                      
##  [4] "Central Blue Ridge Mountains"            
##  [5] "Central Maine Embayment"                 
##  [6] "Central Maine Foothills"                 
##  [7] "Champlain Glacial Lake and Marine Plains"
##  [8] "Connecticut Lakes"                       
##  [9] "Eastern Allegheny Plateau"               
## [10] "Eastern Coal Fields"                     
## [11] "Gettysburg Piedmont Lowland"             
## [12] "Great Valley of Virginia"                
## [13] "Holston Valley"                          
## [14] "Hudson Highlands"                        
## [15] "Hudson Limestone Valley"                 
## [16] "Kittatinny-Shawangunk Ridges"            
## [17] "Lower Foot Hills"                        
## [18] "Lynchburg Belt"                          
## [19] "Mahoosic Rangely Lakes"                  
## [20] "Maine-New Brunswick Lowlands"            
## [21] "Maine Central Mountains"                 
## [22] "Metasedimentary Mountains"               
## [23] "Newark"                                  
## [24] "Northern Blue Ridge Mountains"           
## [25] "Northern Great Valley"                   
## [26] "Northern Green Mountain"                 
## [27] "Northern Piedmont"                       
## [28] "Northern Ridge and Valley"               
## [29] "Piedmont Ridge"                          
## [30] "Piedmont Upland"                         
## [31] "Pocono Plateau"                          
## [32] "Reading Prong"                           
## [33] "Ridge and Valley"                        
## [34] "Rolling Limestone Hills"                 
## [35] "Sandstone Hills"                         
## [36] "Schist Hills"                            
## [37] "Schist Plains"                           
## [38] "Sebago-Ossipee Hills and Plains"         
## [39] "Southern Blue Ridge Mountains"           
## [40] "Southern Green Mountain"                 
## [41] "Southern Piedmont"                       
## [42] "St. John Upland"                         
## [43] "Sunapee Uplands"                         
## [44] "Taconic Foothills"                       
## [45] "Taconic Mountains"                       
## [46] "Triassic Basins"                         
## [47] "Western Allegheny Mountain and Valley"   
## [48] "Western Maine Foothills"                 
## [49] "White Mountains"
n_distinct(eco_shapefile$SUBSECTI_1)
## [1] 49
## Note that 'SUBSECTI_1' names are not unique - there are two different "Northern Piedmont"
eco_shapefile %>%
  filter(SUBSECTI_1 == "Northern Piedmont") %>%
  select(SUBSECTI_1, SUBSECTION, SECTION_NA, PROVINCE_N) %>%
  sf::st_drop_geometry() 
## # A tibble: 2 × 4
##   SUBSECTI_1        SUBSECTION SECTION_NA                    PROVINCE_N         
## * <chr>             <chr>      <chr>                         <chr>              
## 1 Northern Piedmont M211Ba     New England Piedmont          Adirondack-New Eng…
## 2 Northern Piedmont 221De      Northern Appalachian Piedmont Eastern Broadleaf …
eco_shapefile <- eco_shapefile %>%
  mutate(SUBSECTI_1 = case_when(
    SUBSECTI_1 == "Northern Piedmont" & SUBSECTION == "M211Ba" ~ "New Engl. Northern Piedmont",
    SUBSECTI_1 == "Northern Piedmont" & SUBSECTION == "221De" ~ "Appal. Northern Piedmont",
    .default = as.character(SUBSECTI_1)))

Note that SUBSECTI_1 names are not unique. There are two different subsections both named “Northern Piedmont”. See current re-name solution above.

## Distinct SECTION_NA:
n_distinct(eco_shapefile$SECTION_NA)
## [1] 19
levels(as.factor(eco_shapefile$SECTION_NA))
##  [1] "Allegheny Mountains"                         
##  [2] "Aroostook Hills and Lowlands"                
##  [3] "Blue Ridge Mountains"                        
##  [4] "Catskill Mountains"                          
##  [5] "Central Appalachian Piedmont"                
##  [6] "Central Maine Coastal and Embayment"         
##  [7] "Central Ridge and Valley"                    
##  [8] "Green-Taconic-Berkshire Mountains"           
##  [9] "Hudson Valley"                               
## [10] "Lower New England"                           
## [11] "Maine - New Brunswick Foothills and Lowlands"
## [12] "New England Piedmont"                        
## [13] "Northern Appalachian Piedmont"               
## [14] "Northern Cumberland Mountains"               
## [15] "Northern Glaciated Allegheny Plateau"        
## [16] "Northern Ridge and Valley"                   
## [17] "Southern Appalachian Piedmont"               
## [18] "St. Lawrence and Champlain Valley"           
## [19] "White Mountains"
## Distinct PROVINCE_N:
n_distinct(eco_shapefile$PROVINCE_N)
## [1] 5
levels(as.factor(eco_shapefile$PROVINCE_N))
## [1] "Adirondack-New England Mixed Forest--Coniferous Forest--Alpine Meadow"
## [2] "Central Appalachian Broadleaf Forest-Coniferous Forest-Meadow"        
## [3] "Eastern Broadleaf Forest"                                             
## [4] "Northeastern Mixed Forest"                                            
## [5] "Southeastern Mixed Forest"

3.3 Read FIA data into R

Load the FIA data stored locally (From “Download Chapter) into R session.

## load from default location (specified in 01-Download.Rmd chapter):
load_default <- './download_FIA/' 
## Alternatively, enter specific custom location:
load_custom <- 'C:/Users/JJGross/Documents/R_projects/FIA_data/allStates'

load_location <- load_custom
#load_location <- load_default

## Change number of processing cores used, if desired
cores <- parallel::detectCores(logical = FALSE)-2 

Read the FIA data stored locally (From “Download Chapter) using the ReadFIA() function.

## Specify the 13 APPA states by state code (this is extra insurance that the wrong states are not loaded, if other states have been downloaded to same directory)
at_states <- c('CT', 'GA', 'ME', 'MD', 'MA', 
               'NH', 'NJ', 'NY', 'NC', 'PA', 
               'TN', 'VT', 'VA')

## Read FIA data 
at_states_FIA <- readFIA(dir = load_location, states = at_states, nCores = cores)

3.4 Clip FIA plots by Ecoregion

clipFIA() performs space-time queries on Forest Inventory and Analysis Database (FIADB). clipFIA subsets the dataset to include only data associated with particular inventory years (i.e., most recent), and/or only data within a user-defined region.

Spatially, the mask = eco argument below selects only the at_states_FIA FIA plots which fall within the perimeter of the eco spatial extent (i.e. the outer boundary of the ecoregion polygons).

Temporally, the mostRecent argument returns only data for most recent inventory if set to TRUE

## Restrict to most recent inventory
at_FIA_MR <- clipFIA(at_states_FIA, matchEval = TRUE, mostRecent = TRUE, mask = eco_shapefile)
saveRDS(at_FIA_MR, file = paste0(save_location, "at_FIA_MR.rds"))

## Access all inventories
at_FIA <- clipFIA(at_states_FIA, matchEval = TRUE, mostRecent = FALSE, mask = eco_shapefile)
saveRDS(at_FIA, file = paste0(save_location, "at_FIA.rds"))

Note the argument matchEval = TRUE in the above function. The clipFIA() documentation states that:

“if matchEval = TRUE, clipFIA() returns a subset of data for which there are matching reporting years across states. Only useful if db contains multiple state subsets of the FIA database.”

This seems appropriate for the Appalachian Trail dataset, but when compared (12 March 2024), both matchEval = TRUE and matchEval = FALSE appeared to result in the same output. This may have something to do with the method that is used (e.g. ‘Annual’“’ vs ‘TI’). Further evaluation is needed.

Note that currently matchEval argument is set to matchEval = TRUE for both at_FIA and at_FIA_MR. Confirm this is appropriate.

3.4.1 Ecosubsections

It is important to note the column ECOSUBCD which is part of the native FIA database and referenced in the FIA Database User Guide. The ECOSUBCD column can be included in the output of most rFIA functions with the argument grpBy = c(ECOSUBCD). The FIA database guide states that subsection codes for the conterminous United States were developed as part of the “Ecological Subregions: Sections and Subsections for the Conterminous United States” publication (Cleland et al. 2007).

Conversely, the same ecoregional subsection code can be found within the APPA_Ecoregions_USFS_HUC10_Shell_AEA shapefile column eco$SUBSECTION along with additional related columns - for example, the full subsection name in the SUBSECTI_1 column. These columns (and others added below) are appended to the FIA dataset during most rFIA functions by way of the polys = eco. For this reason the polys = ecoargument is often more advantageous than the grpBy = c(ECOSUBCD) and the polys = ecoargument is utilized throughout the make.Rmd chapter.

Note that this polys = eco argument is different than the rFIA::clipFIA(mask = eco) function/argument which only clips the plot data by the eco mask (outer spatial boundary) and does not append any data.

3.5 Supporting tables

3.5.1 Appalachian trail spatial centerline data

## Manually downloaded `Appalachian_National_Scenic_Trail.shp` from `APPAForesthealthReport` repo. 
centerline_shapefile_location <- "move_to_server/at_centerline/" 

## read in shapefile of Appalachian trail center-line
 at_centerline <- sf::st_read(paste0(centerline_shapefile_location, "Appalachian_National_Scenic_Trail.shp")) %>%
   mutate(region_3cl = case_when(
     Region == "New England" ~ "Northeast",
     Region == "Mid-Atlantic" ~ "Mid-Atlantic",
     Region %in% c("Southern", "Virginia") ~ "Southeast"
   ))

saveRDS(at_centerline, paste0(save_location, "at_centerline.rds"))

Appalachian_National_Scenic_Trail.shp currently saved within APPAForesthealthReport repo. In future move this to appropriate location (e.g. IRMA).

3.5.2 APPA FIA plots

Note that the rFIA::tpa() function was used below to calculate and construct tables for APPA FIA plot-level attributes (e.g. elevation, etc.). TPA was selected because it is a core metric of FIA methods and should be present in all field visited plots.

## Get plot-level data for the most recent tpa dataset
tpa_plots_MR <- rFIA::tpa(at_FIA_MR, 
                          byPlot = TRUE, 
                          grpBy = c(STATECD, ECOSUBCD, ELEV),
                          returnSpatial = TRUE) 

## Get plot-level data for full tpa dataset  
tpa_plots <- rFIA::tpa(at_FIA, 
                       byPlot = TRUE, 
                       grpBy = c(STATECD, ECOSUBCD, ELEV),
                       returnSpatial = TRUE) 

## Get only the unique plot locations (pltID)
## i.e. exclude the revisits 
tpa_plots_by_year <- tpa_plots |>
  # HAVE TO DO BIND ROWS HERE BECAUSE `tpa_plots' SUPRISINGLY DOES NOT 
  # CONTAIN ALL PLOTS FOUND IN `tpa_plots_MR'.
  # Error with rFIA discussed below
  dplyr::bind_rows(tpa_plots_MR) |>
  dplyr::group_by(PLT_CN) |> #PLT_CN is each unique event, while pltID is each unique plot location
  distinct()

## Get only the unique plot locations (pltID)
## i.e. exclude the revisits 
tpa_plots_distinct <- tpa_plots_by_year |>
  select(pltID, STATECD, ECOSUBCD, ELEV, geometry) %>%
  distinct()

Display APPA FIA plots by eco-subsection and southern most plot used to calculated distance.

## Select Southern-most plot
most_S_plot <- tpa_plots_distinct %>%
  filter(pltID == "5_13_85_5")

## ecosubsection colors
ecosubCol <- colorFactor(rainbow(51), tpa_plots_distinct$ECOSUBCD)
## leaflet map
tpa_plots_distinct %>%  
  leaflet() %>%
  addTiles() %>%
  addCircleMarkers(
    color = ~ecosubCol(ECOSUBCD),
    stroke = FALSE, fillOpacity = 1,
    radius = 4) %>%
  addMarkers(data = most_S_plot)

Calculate each plot’s distance from southern point for use in displaying data as transect along axis.

tpa_plots_distance <- tpa_plots_distinct |>
  mutate(S_plot = most_S_plot$geometry) |> # add south point
  mutate(dist_unit = sf::st_distance(
    S_plot, geometry, by_element = TRUE)) |> # calculate distance
  mutate(distance_m = round(as.numeric(dist_unit))) |>
  select(-S_plot, -dist_unit)

## Create a continuous palette function
dist_pal <- colorNumeric(palette = "Blues", domain = tpa_plots_distance$distance_m)
## leaflet map
tpa_plots_distance %>%  
  leaflet() %>%
  addTiles() %>%
  addCircleMarkers(
    color = ~dist_pal(distance_m),
    stroke = FALSE, fillOpacity = 1,
    radius = 4) %>%
  addMarkers(data = most_S_plot)

Demo usage of plot attributes

plot_attributes <- tpa_plots_distance

# Demo plot distance x elevation
plot_attributes |>
  group_by(ECOSUBCD)|>
  mutate(median_dist = median(distance_m)) |>
  mutate(median_elev = median(ELEV)) |>
  ggplot(aes(x= fct_reorder(ECOSUBCD, distance_m), 
             y=ELEV, 
             fill = as.factor(median_elev))) +
  geom_boxplot() +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
        legend.position = 'none') +
  xlab("Ecosubsection (from SW to NE)") + ylab("Plot Elevation") 

3.5.2.1 Check Plot Counts

Perform some checks on records within tpa dataset - helps gauge dataset changes. Can also add quality assurance checks here:

## Check that entire dataset (at_plots) contains most recent dataset (at_plots_MR) 
tpa_chk <- sf::st_drop_geometry(tpa_plots)
tpa_chk_MR <- sf::st_drop_geometry(tpa_plots_MR)

MRplots_not_in_full_tpa_dataset <- anti_join(tpa_chk_MR, tpa_chk, by = join_by(pltID))
if (nrow(MRplots_not_in_full_tpa_dataset) != 0) {
  warning("tpa_plots does not contain all plots found in tpa_plots_MR (most recent) dataset")
}
## Warning: tpa_plots does not contain all plots found in tpa_plots_MR (most
## recent) dataset
## total events/visits
tpa_event_count <- nrow(tpa_chk) 
tpa_event_count # 12,457
## [1] 12457
## total plot locations
tpa_plot_count <- length(unique(tpa_chk$pltID)) 
tpa_plot_count # 4,328
## [1] 4328
tpa_plots_summary <- tpa_chk |>
  group_by(pltID) |>
  summarise(visits = n()) |>
  group_by(visits) |>
  summarize(n_plots = n())

tpa_plots_summary
## # A tibble: 5 × 2
##   visits n_plots
##    <int>   <int>
## 1      1     571
## 2      2     781
## 3      3    1590
## 4      4    1376
## 5      5      10
## In 2022 
#      1     571 (571 plots measured once)
#      2     781 (781 plots measured twice)
#      3    1590
#      4    1376
#      5      10

## total events/visits
tpa_event_count_MR <- nrow(tpa_chk_MR) 
tpa_event_count_MR # 3,980
## [1] 3980
## total plot locations
tpa_plot_count_MR <- length(unique(tpa_chk_MR$pltID)) 
tpa_plot_count_MR # 3,980
## [1] 3980
if (tpa_event_count_MR != tpa_plot_count_MR) {
  warning("some plots not unique in most recent (MR) dataset")
}

Possible error with rFIA: tpa_plots (full dataset) does not contain all plots found in tpa_plots_MR (most recent) dataset” see MRplots_not_in_full_tpa_dataset for table of missing plots.

3.5.3 Subsections

3.5.3.1 Check subsections

tpa_ECOSUBCD_distinct <- tpa_plots_distinct |>
  sf::st_drop_geometry() |>
  distinct(ECOSUBCD)

## Eco shapefile:
eco <- eco_shapefile %>%
  full_join(tpa_ECOSUBCD_distinct, by = c("SUBSECTION" = "ECOSUBCD")) %>%
  sf::st_transform('+proj=longlat +datum=WGS84')

## return all rows from eco_shapefile without a match in FIA_plots
## These are subsections with no FIA plots:
dplyr::anti_join(eco, tpa_ECOSUBCD_distinct, by = join_by(SUBSECTION == ECOSUBCD))
## Simple feature collection with 3 features and 15 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -83.99575 ymin: 34.51386 xmax: -80.44353 ymax: 37.65403
## Geodetic CRS:  +proj=longlat +datum=WGS84
## # A tibble: 3 × 16
##      AREA PERIMETER ECOMAP_200 ECOMAP_201 PROVINCE SECTION SUBSECTION DOMAIN_NAM
##     <dbl>     <dbl>      <int>      <int> <chr>    <chr>   <chr>      <chr>     
## 1  7.42e8   436121.       1232       1381 M221     M221B   M221Be     Humid Tem…
## 2  3.99e4  1265905.       1480       1590 221      221J    221Ja      Humid Tem…
## 3  2.10e6   415055.       1688       1746 231      231A    231Ad      Humid Tem…
## # ℹ 8 more variables: DIVISION_N <chr>, PROVINCE_N <chr>, SECTION_NA <chr>,
## #   SUBSECTI_1 <chr>, ATIntersec <int>, Acres <dbl>, Hectares <dbl>,
## #   geometry <MULTIPOLYGON [°]>
## return all rows from FIA_plots without a match in eco_shapefile
## These rows are locations with FIA plots but no subsection included in shapefile:
eco %>% filter(is.na(SECTION)) 
## Simple feature collection with 1 feature and 15 fields (with 1 geometry empty)
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: NA ymin: NA xmax: NA ymax: NA
## Geodetic CRS:  +proj=longlat +datum=WGS84
## # A tibble: 1 × 16
##    AREA PERIMETER ECOMAP_200 ECOMAP_201 PROVINCE SECTION SUBSECTION DOMAIN_NAM
## * <dbl>     <dbl>      <int>      <int> <chr>    <chr>   <chr>      <chr>     
## 1    NA        NA         NA         NA <NA>     <NA>    M211Aa     <NA>      
## # ℹ 8 more variables: DIVISION_N <chr>, PROVINCE_N <chr>, SECTION_NA <chr>,
## #   SUBSECTI_1 <chr>, ATIntersec <int>, Acres <dbl>, Hectares <dbl>,
## #   geometry <MULTIPOLYGON [°]>

3.5.3.2 Missing eco$SUBSECTION info

Note that eco shapefile is missing information for SUBSECTION M211Aa - International Boundary Plateau. This needs to be fixed in original shapefile.

Temporary fix here:

## missing info found by google search of "M211Aa"
# https://www.fs.usda.gov/research/publications/misc/73326-wo-gtr-76d-cleland2007.pdf
missing_subsection <- data.frame(
  PROVINCE = "M211", 
  SECTION = "M211A", 
  SUBSECTION = "M211Aa", 
  SECTION_NA = "White Mountains",
  SUBSECTI_1 = "International Boundary Plateau")

eco <- eco |>
  filter(SUBSECTION != "M211Aa") |>
  bind_rows(missing_subsection)

3.5.3.3 Calculate n plots

Calculate plots per subsection and median elevation and distance for all and most recent (MR) datasets

# calculate plots per subsection
plot_attributes_drop_geo <- plot_attributes %>%
  sf::st_drop_geometry()

n_plots_tpa_MR <- tpa_plots_MR |>
  sf::st_drop_geometry() |>
  left_join(plot_attributes_drop_geo, 
            by = join_by(pltID, STATECD, ECOSUBCD, ELEV)) |>
  group_by(ECOSUBCD) |>
  summarize(n_plots_subsect_MR = n(),
            median_elevation_MR = median(ELEV),
            median_distance_MR = median(distance_m))

n_plots_tpa <- tpa_plots |>
  sf::st_drop_geometry() |>
  left_join(plot_attributes_drop_geo, 
            by = join_by(pltID, STATECD, ECOSUBCD, ELEV)) |>
  group_by(ECOSUBCD) |>
  summarize(n_plots_subsect = n(),
            median_elevation = median(ELEV),
            median_distance = median(distance_m))

3.5.3.4 Add states and subsection abbreviations

Shorter subsection names with states they occur in.

## Read in state code names
state_codes <- read_csv("move_to_server/us-state-ansi-fips.csv", show_col_types = FALSE) %>%
  mutate(st = as.integer(st))

## Get all subsections in one dataset (subsections from FIA + eco shapefile)
## Have to join because these can be different if subsection missing from eco shapefile
## or FIA data not collected for a subsection
subsections_per_state <- plot_attributes_drop_geo |>
  select(STATECD, ECOSUBCD) |>
  distinct() |>
  right_join(eco, by = join_by(ECOSUBCD == SUBSECTION))

subsections_all <- subsections_per_state |>
  select(-STATECD) |>
  group_by(ECOSUBCD) |>
  distinct()

## Get list of which states a subsection occurs - will help with data viz interpretation later
subsection_state_list <- subsections_per_state |>
  select(STATECD, ECOSUBCD) |>
  distinct() |>
  left_join(state_codes, by = join_by(STATECD == st)) |>
  select(-STATECD, -stname) |>
  arrange(stusps) |>
  pivot_wider(names_from = stusps, values_from = stusps) |>
  unite("states", CT:VT, remove = FALSE, na.rm = TRUE, sep = " ") |>
  mutate(states = paste0("(",states,")")) |>
  select(ECOSUBCD, states)

## Abbrivations to shorten subsection names
replace_list <- (c("Mountain" = "Mt", 
                  "Central" = "C.", 
                  "Northern" = "N.",
                  "Southern" = "S.",
                  "Eastern" = "E.",
                  "New Brunswick" = "NB",
                  "Lowlands" = "Lowl.",
                  "Lowland" = "Lowl.",
                  "Uplands" = "Upl.",
                  "Upland" = "Upl.",
                  "Plateau" = "Plat.",
                  "Piedmont" = "Pdmt",
                  "Blue" = "Bl.",
                  "Ridge" = "Rdg",
                  "Valley" = "Vly",
                  "Maine" = "ME",
                  "Connecticut" = "CT",
                  "Vermont" = "VT",
                  "Virginia" = "VA",
                  "Western" = "W.",
                  "Glacial Lake and Marine Plains" = "GL/MP",
                  "Hills and Plains" = "",
                  "Kittatinny-Shawangunk" = "Kitt-Shaw",
                  "Metasedimentary" = "Meta",
                  "Mahoosic Rangely Lakes" = "Mahoo/Rng Lks",
                  "Highlands" = "Highl.",
                  "Hudson" = "Hud.",
                  "Limestone" = "Limstn",
                  "Gettysburg" = "Getty",
                  "Sebago-Ossipee" = "Seb-Ossipee",
                  "Berkshire" = "Berk",
                  "Embayment" = "Embaymt",
                  "International" = "Int.",
                  "Plateau" = "Plat."
                  ))

subsections_abbr <- subsections_all |>
  left_join(subsection_state_list, by = join_by(ECOSUBCD)) |>
  mutate(SUBSECT_ABBR = str_replace_all(SUBSECTI_1, replace_list)) |>
  unite("SUBSECT_ABBR_ST", SUBSECT_ABBR:states, remove = FALSE, na.rm = TRUE, sep = " ") 

3.5.4 Save subsection attributes

# append n plot counts for entire dataset and most recent
subsection_attributes <- subsections_abbr |>
  left_join(n_plots_tpa, by = join_by(ECOSUBCD)) |>
  left_join(n_plots_tpa_MR, by = join_by(ECOSUBCD))

saveRDS(subsection_attributes, file = paste0(save_location, "subsection_attributes.rds"))

3.5.5 Save centroids

section_centroids <- subsection_attributes |>
  sf::st_as_sf() |>
  group_by(SECTION_NA) |>
  summarise(geometry = sf::st_union(geometry)) |>
  sf::st_centroid() |>
  mutate(X = sf::st_coordinates(geometry)[,1],
         Y = sf::st_coordinates(geometry)[,2])
## Warning: st_centroid assumes attributes are constant over geometries
str(section_centroids)
## sf [19 × 4] (S3: sf/tbl_df/tbl/data.frame)
##  $ SECTION_NA: chr [1:19] "Allegheny Mountains" "Aroostook Hills and Lowlands" "Blue Ridge Mountains" "Catskill Mountains" ...
##  $ geometry  :sfc_POINT of length 19; first list element:  'XY' num [1:2] -80.8 37.5
##  $ X         : num [1:19] -80.8 -68.6 -81.9 -74.5 -79.4 ...
##  $ Y         : num [1:19] 37.5 45.9 36.4 42 37.4 ...
##  - attr(*, "sf_column")= chr "geometry"
##  - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA
##   ..- attr(*, "names")= chr [1:3] "SECTION_NA" "X" "Y"
subsection_centroids <- subsection_attributes |>
  sf::st_as_sf() |>
  sf::st_centroid()
## Warning: st_centroid assumes attributes are constant over geometries
## leaflet map
ecosubCol <- colorFactor(rainbow(51), subsection_attributes$SUBSECT_ABBR)

subsection_attributes |> 
  sf::st_as_sf() |>
  leaflet() |>
  addTiles() |>
  addPolygons(color = ~ecosubCol(SUBSECT_ABBR), label = ~SUBSECT_ABBR) |>
  addCircleMarkers(data = subsection_centroids, 
                   color = ~ecosubCol(SUBSECT_ABBR),
                   stroke = FALSE, fillOpacity = 1,
                   radius = 10, label = ~SUBSECT_ABBR) |>
  addCircleMarkers(data = section_centroids, 
                   color = "black", 
                   stroke = FALSE, fillOpacity = 1,
                   radius = 10, label = ~SECTION_NA)
## Warning in validateCoords(lng, lat, funcName): Data contains 1 rows with either
## missing or invalid lat/lon values and will be ignored
saveRDS(section_centroids, file = paste0(save_location, "section_centroids.rds"))
saveRDS(subsection_centroids, file = paste0(save_location, "subsection_centroids.rds"))

A centroid file was in original repo - but not sure how it was created - took a stab here. Check map above to see if method was acceptable.

3.5.6 Save plot attributes

# create list of abbreviated subsection to append to plot attributes table
sub_attributes_for_plots <- subsection_attributes %>%
  select(ECOSUBCD, PROVINCE, SECTION, DOMAIN_NAM, DIVISION_N, PROVINCE_N,
         SECTION_NA, SUBSECTI_1,ATIntersec,
         SUBSECT_ABBR, SUBSECT_ABBR_ST, states)

## Plot attributes unique locations  
plot_attributes_final <- plot_attributes |>
  right_join(sub_attributes_for_plots, by = join_by(ECOSUBCD)) |>
  select(-STATECD) 

## Plot attributes by year
plot_attributes_drop_geom <- plot_attributes_final |>
  sf::st_drop_geometry()
plot_attributes_by_year_final <- tpa_plots_by_year |>
  select(YEAR, pltID) |>
  left_join(plot_attributes_drop_geom, by = join_by(pltID))

saveRDS(plot_attributes_final, file = paste0(save_location, "plot_attributes_locations.rds")) 
saveRDS(plot_attributes_by_year_final, file = paste0(save_location, "plot_attributes_by_year.rds")) 

3.5.6.1 join subsection attributes to eco

eco_final <- eco |>
  left_join(subsection_attributes)

saveRDS(eco_final, file = paste0(save_location, "eco.rds")) 

3.6 Check plot attributes final

Note the new Ecosubsection names and ability to detect subsections without FIA plots

# Visualize plot distance x elevation with plot_attributes_final
plot_attributes_final_graph <- na.omit(plot_attributes_final)

plots_na <- plot_attributes_final |>
  filter(is.na(pltID)) |>
  pull(SUBSECT_ABBR)
plots_na <- paste("Subsections without plot data:", paste(plots_na, collapse = ", "))

plot_attributes_final_graph |>
  group_by(ECOSUBCD)|>
  mutate(median_dist = median(distance_m)) |>
  mutate(median_elev = median(ELEV)) |>
  ggplot(aes(x= fct_reorder(SUBSECT_ABBR_ST, distance_m), 
             y=ELEV, 
             fill = as.factor(median_elev))) +
  geom_boxplot() +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
        legend.position = 'none') +
  xlab("Ecosubsection (from SW to NE)") + ylab("Plot Elevation") +
  labs(caption = plots_na)

## Vizualize plots per year

# Visualize number of plots per subsection per year
n_plots_per_year <- tpa_plots |>
  sf::st_drop_geometry() |>
  group_by(ECOSUBCD, YEAR) |>
  summarize(n_plots_per_year = n(), .groups = 'drop') |>
  right_join(subsection_attributes, by = join_by(ECOSUBCD))

n_plots_per_year_graph <- na.omit(n_plots_per_year)

n_plots_per_year_graph %>%
  mutate(highlight = case_when(n_plots_per_year == 1 ~ "1 plot",
                               n_plots_per_year == 2 ~ "2 plots",
                               .default = NA)) %>%
  ggplot(aes(x=forcats::fct_reorder(SUBSECT_ABBR_ST, median_distance), 
             y=YEAR, 
             size = n_plots_per_year, 
             color = highlight)) +
  geom_count() +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
  scale_size_area() +
  scale_color_manual(values = c("red2", "cyan3", "grey50"),
                     breaks = c("1 plot", "2 plots")) +
  xlab("Ecosubsection (from SW to NE)") +
  labs(caption = plots_na)